---
title: "goodsports_replication_file"
author: "Alex Tolkin"
date: "2025-05-16"
output: pdf_document
---

INIITIAL DATA IMPORT

```{r setup, echo=FALSE}
# load packages
pacman::p_load(haven, psych, modelsummary, dplyr, kableExtra, ggplot2, estimatr, sjlabelled, fabricatr, sensemakr, fixest, rstatix, tidyr, ggrepel, ggtext, emmeans, ggsignif, tinytable)

# load panel data
wave_6 <- read_dta("data/amerispeak/250523wave6.dta")
wave_7 <- read_dta("data/amerispeak/250523wave7.dta")

#### PRE ELECTION LEGIT ####
wave_6$leg1_corrected <- 
  ifelse(wave_6$LEG1_W6 < 5, wave_6$LEG1_W6, NA) # correct direction
wave_6$leg2_corrected <- 
  ifelse(wave_6$LEG2_W6 < 5, wave_6$LEG2_W6, NA) # correct direction
wave_6$leg3_corrected <- 
  ifelse(wave_6$LEG3_W6 < 5, wave_6$LEG3_W6, NA) # correct direction
wave_6$leg4_corrected <- 
  ifelse(wave_6$LEG4_W6 < 5, 5 - wave_6$LEG4_W6, NA) # reverse
wave_6$leg5_corrected <- 
  ifelse(wave_6$LEG5_W6 < 5, wave_6$LEG5_W6, NA) # correct direction

# get alpha for this
# psych::alpha(wave_6[,c("leg1_corrected", "leg2_corrected", "leg3_corrected",
#                        "leg4_corrected", "leg5_corrected")], check.keys = FALSE) # 0.42
# doesn't really hold together, so just examine within-person change

wave_6$pre_electionlegit <- 
  rowMeans(wave_6[,c("leg1_corrected", "leg2_corrected", "leg3_corrected",
                     "leg4_corrected", "leg5_corrected")])

#### Sports consumption ####
wave_6$football <- 
  ifelse(wave_6$SPORTS_1_W6_R < 6, 6 - wave_6$SPORTS_1_W6_R, NA)
wave_6$basketball <- 
  ifelse(wave_6$SPORTS_2_W6_R < 6, 6 - wave_6$SPORTS_2_W6_R, NA)
wave_6$baseball <- 
  ifelse(wave_6$SPORTS_3_W6_R < 6, 6 - wave_6$SPORTS_3_W6_R, NA)
wave_6$soccer <- 
  ifelse(wave_6$SPORTS_4_W6_R < 6, 6 - wave_6$SPORTS_4_W6_R, NA)
wave_6$hockey <- 
  ifelse(wave_6$SPORTS_5_W6_R < 6, 6 - wave_6$SPORTS_5_W6_R, NA)

# OTHER SPORTS 
wave_6$autoracing <- 
  ifelse(wave_6$SPORTS_6_W6_R < 6, 6 - wave_6$SPORTS_6_W6_R, NA)
wave_6$tennis <- 
  ifelse(wave_6$SPORTS_7_W6_R < 6, 6 - wave_6$SPORTS_7_W6_R, NA)
wave_6$golf <- 
  ifelse(wave_6$SPORTS_8_W6_R < 6, 6 - wave_6$SPORTS_8_W6_R, NA)
wave_6$boxingmma <- 
  ifelse(wave_6$SPORTS_9_W6_R < 6, 6 - wave_6$SPORTS_9_W6_R, NA)

# get alpha for team sports
# psych::alpha(wave_6[,c("football", "basketball",
#                      "baseball", "soccer",
#                      "hockey")], check.keys = FALSE) # 0.75

wave_6$team_sports <- 
  rowMeans(wave_6[,c("football", "basketball",
                     "baseball", "soccer",
                     "hockey")],
           na.rm = TRUE)

#### Wave 6 Demographics ####
wave_6$female <- ifelse(wave_6$GENDER_W6 == 2, 1, 0)
wave_6$white <- ifelse(wave_6$RACETHNICITY_W6 == 1, 1, 0)
wave_6$nonwhite <- ifelse(wave_6$RACETHNICITY_W6 == 1, 0, 1)
wave_6$black <- ifelse(wave_6$RACETHNICITY_W6 == 2, 1, 0)
wave_6$hispanic <- ifelse(wave_6$RACETHNICITY_W6 == 4, 1, 0)
wave_6$asian <- ifelse(wave_6$RACETHNICITY_W6 == 6, 1, 0)
wave_6$othermulti <- ifelse(wave_6$RACETHNICITY_W6 %in% c(3, 5), 1, 0)
wave_6$age_7 <-  scales::rescale(as.numeric(wave_6$AGE7_W6))
wave_6$education_4 <- scales::rescale(as.numeric(wave_6$EDUC4_W6))
wave_6$income_18 <- scales::rescale(as.numeric(wave_6$INCOME_W6))
wave_6$trump_therm <-scales::rescale(ifelse(wave_6$THERMTRUMP_W6 < 500, wave_6$THERMTRUMP_W6, NA))
wave_6$biden_therm <- scales::rescale(ifelse(wave_6$THERM5_W6 < 500, wave_6$THERM5_W6, NA))
wave_6$partyid_5 <- scales::rescale(ifelse(wave_6$PARTYID5_W6 < 10, wave_6$PARTYID5_W6, NA))
wave_6$democrat <- ifelse(wave_6$PARTYID5_W6 %in% c(1,2), 1, 0)
wave_6$republican <- ifelse(wave_6$PARTYID5_W6 %in% c(4,5), 1, 0)
wave_6$married <- ifelse(wave_6$MARITAL_W6 == 1, 1, 0)
wave_6$disc_black <- ifelse(wave_6$DIS1_W6 < 6, 6 - wave_6$DIS1_W6, NA)
wave_6$disc_hisp <- ifelse(wave_6$DIS2_W6 < 6, 6 - wave_6$DIS2_W6, NA)
wave_6$disc_white <- ifelse(wave_6$DIS3_W6 < 6, 6 - wave_6$DIS3_W6, NA)
wave_6$disc_men <- ifelse(wave_6$DIS5_W6 < 6, 6 - wave_6$DIS5_W6, NA)
wave_6$disc_christ <- ifelse(wave_6$DIS7_W6 < 6, 6 - wave_6$DIS7_W6, NA)
wave_6$pol_interest <- scales::rescale(ifelse(wave_6$Q16_W6 < 5, 5 - wave_6$Q16_W6, NA))

#### POST ELECTION LEGIT ####
wave_7$leg1_corrected <- 
  ifelse(wave_7$LEG1_W7 < 5, wave_7$LEG1_W7, NA) # correct direction
wave_7$leg2_corrected <- 
  ifelse(wave_7$LEG2_W7 < 5, wave_7$LEG2_W7, NA) # correct direction
wave_7$leg3_corrected <- 
  ifelse(wave_7$LEG3_W7 < 5, wave_7$LEG3_W7, NA) # correct direction
wave_7$leg4_corrected <- 
  ifelse(wave_7$LEG4_W7 < 5, 5 - wave_7$LEG4_W7, NA) # reverse
wave_7$leg5_corrected <- 
  ifelse(wave_7$LEG5_W7 < 5, wave_7$LEG5_W7, NA) # correct direction

wave_7$post_electionlegit <- 
  rowMeans(wave_7[,c("leg1_corrected", "leg2_corrected",
                                     "leg3_corrected", "leg4_corrected",
                                     "leg5_corrected")])

# psych::alpha(wave_7[,c("leg1_corrected", "leg2_corrected", "leg3_corrected",
#                         "leg4_corrected", "leg5_corrected")], 
#              check.keys = FALSE) #.68


#### MEDIA CONSUMPTION ####
# get the variables you are looking for when it comes to Fox news consumption
# rename all variables
wave_7 <- wave_7 %>% dplyr::mutate(
  thefive = ME2A_9_W7,
  hannity = ME2A_11_W7,
  ingrahm_angle = ME2A_12_W7,
  bretbaier = ME2A_19_W7,
  tuckercarlson = ME2A_21_W7,
  foxandfriends = ME2B_8_W7,
  foxnewsnight = ME2B_9_W7,
  gregkelly = ME2B_10_W7,
  judgejeanine = ME2B_12_W7,
  lifelibertylevin = ME2B_13_W7,
  outnumbered = ME2B_16_W7,
  neilcavuto = ME2B_19_W7,
  brianwilliams = ME2A_1_W7, #
  davidmuir = ME2A_2_W7, # 
  chrishayes = ME2A_3_W7, #
  americasnewsroom = ME2A_4_W7, #
  arimelber = ME2A_5_W7, #
  cbsevening = ME2A_6_W7, #
  cbsmorning = ME2A_7_W7, #
  deadlinewhitehouse = ME2A_8_W7, # 
  gma = ME2A_10_W7, #
  lawrenceo = ME2A_13_W7, #
  tapper = ME2A_14_W7, #
  reid = ME2A_15_W7, #
  lesterholt = ME2A_16_W7, #
  today = ME2A_17_W7, #
  maddow = ME2A_18_W7, #
  maccallum = ME2A_20_W7, #
  sixtyminutes = ME2B_1_W7, #
  americareports = ME2B_2_W7, #
  andersoncooper = ME2B_3_W7, #
  donlemon = ME2B_4_W7, #
  cuomo = ME2B_5_W7, #
  erinburnett = ME2B_6_W7, #
  facethenation = ME2B_7_W7, #
  kimmel = ME2B_11_W7, #
  colbert = ME2B_14_W7, #
  mtp = ME2B_15_W7, #
  newshour = ME2B_17_W7, #
  fallon = ME2B_18_W7, #
  theview = ME2B_20_W7
)
wave_7$fox_watching <-
  # for your scale here, you should scale by STANDARD DEVIATION
  scale(rowSums(wave_7[,c("thefive", "hannity", "ingrahm_angle",
  "bretbaier", "tuckercarlson", "foxandfriends", "foxnewsnight",
  "judgejeanine", "lifelibertylevin", "outnumbered", "neilcavuto")], na.rm = T))

wave_7$msnbc_watching <-
  scale(rowSums(wave_7[,c("arimelber", "brianwilliams", "chrishayes", 
                    "deadlinewhitehouse", "lawrenceo",
                    "reid", "maddow")], na.rm = T))

wave_7$cnn_watching <-
  scale(rowSums(wave_7[,c("andersoncooper", "donlemon", "tapper", "cuomo", 
                  "erinburnett")], na.rm = T))

wave_7$broadcast_watching <-
  scale(rowSums(wave_7[,c("davidmuir", "cbsevening", "cbsmorning", "gma", "today", 
                    "facethenation", "mtp", "theview", "newshour", "lesterholt",
                    "sixtyminutes")], na.rm = T))

wave_7$newsmax <-
  scale(rowSums(wave_7[,c("gregkelly")], na.rm = T))
```

VIEWERSHIP COMPARED TO NEWS

```{r fig_3_viewership, echo=FALSE}
deviations <- wave_7 |> 
  select(starts_with("ME2") | PartyID7_W7) |>
  select(!matches("F|K")) |> # remove don't knows and stuff
  select(!starts_with("ME2A_22")) |> # remove unspecified shows like "other"
  select(!starts_with("ME2B_21")) |>
  rename(
    brianwilliams = ME2A_1_W7, #
    davidmuir = ME2A_2_W7, # 
    chrishayes = ME2A_3_W7, #
    americasnewsroom = ME2A_4_W7, #
    arimelber = ME2A_5_W7, #
    cbsevening = ME2A_6_W7, #
    cbsmorning = ME2A_7_W7, #
    deadlinewhitehouse = ME2A_8_W7, # 
    thefive = ME2A_9_W7, #
    gma = ME2A_10_W7, #
    hannity = ME2A_11_W7, #
    ingrahm_angle = ME2A_12_W7, #
    lawrenceo = ME2A_13_W7, #
    tapper = ME2A_14_W7, #
    reid = ME2A_15_W7, #
    lesterholt = ME2A_16_W7, #
    today = ME2A_17_W7, #
    maddow = ME2A_18_W7, #
    bretbaier = ME2A_19_W7, #
    maccallum = ME2A_20_W7, #
    tuckercarlson = ME2A_21_W7, #
    sixtyminutes = ME2B_1_W7, #
    americareports = ME2B_2_W7, #
    andersoncooper = ME2B_3_W7, #
    donlemon = ME2B_4_W7, #
    cuomo = ME2B_5_W7, #
    erinburnett = ME2B_6_W7, #
    facethenation = ME2B_7_W7, #
    foxandfriends = ME2B_8_W7, #
    foxnewsnight = ME2B_9_W7, #
    gregkelly = ME2B_10_W7, #
    kimmel = ME2B_11_W7, #
    judgejeanine = ME2B_12_W7, #
    lifelibertylevin = ME2B_13_W7, #
    colbert = ME2B_14_W7, #
    mtp = ME2B_15_W7, #
    outnumbered = ME2B_16_W7, #
    newshour = ME2B_17_W7, #
    fallon = ME2B_18_W7, #
    neilcavuto = ME2B_19_W7, #
    theview = ME2B_20_W7) |>
  filter(PartyID7_W7 < 8) # remove don't knows from pid

# set all 0s to NA
deviations[deviations == 0] <- NA

# multiply everyone who has seen a show with their PID

# multiply all columns in deviations by the partyid column
deviations_adj <- deviations[,-ncol(deviations)] * as.vector(deviations$PartyID7_W7)

# get the means
tv_mean <- sapply(deviations_adj, mean, na.rm = T)

# get the variances
tv_var <- sapply(deviations_adj, var, na.rm = T)

# now moving on to wave 6 sports
wave_6$pid7 <- ifelse(wave_6$PARTYID7_NEW_W6 < 10, wave_6$PARTYID7_NEW_W6, NA)
wave_6$football <- 
  ifelse(wave_6$SPORTS_1_W6_R < 6, 6 - wave_6$SPORTS_1_W6_R, NA)
wave_6$basketball <- 
  ifelse(wave_6$SPORTS_2_W6_R < 6, 6 - wave_6$SPORTS_2_W6_R, NA)
wave_6$baseball <- 
  ifelse(wave_6$SPORTS_3_W6_R < 6, 6 - wave_6$SPORTS_3_W6_R, NA)
wave_6$soccer <- 
  ifelse(wave_6$SPORTS_4_W6_R < 6, 6 - wave_6$SPORTS_4_W6_R, NA)
wave_6$hockey <- 
  ifelse(wave_6$SPORTS_5_W6_R < 6, 6 - wave_6$SPORTS_5_W6_R, NA)

# restrict to people who watch each sport several times a month
deviations_w6_tougher <- wave_6 |> 
  select(football, basketball, baseball, soccer, hockey)
deviations_w6_tougher <- ifelse(deviations_w6_tougher > 2, 1, NA)

# multiply all columns in deviations df by the partyid column to get PID of respondents
# who watch each sport multiple times a month
deviations_w6_adj_tougher <- as.data.frame(deviations_w6_tougher * wave_6$pid7)
sports_mean_tougher <- sapply(deviations_w6_adj_tougher, mean, na.rm = T)

# sort the means
combined_mean_tougher <- sort(c(tv_mean, sports_mean_tougher), decreasing = T)
means_tougher <- data.frame(combined_mean_tougher)
means_tougher$label <- row.names(means_tougher)

# calculate average mean for the entire sample
avg_mean_w7 <- mean(wave_7$PartyID7_W7, na.rm = T) # 3.865271

# group television programs by category
means_tougher$category <-
  case_when(
    means_tougher$label %in% c("arimelber", "brianwilliams", "chrishayes", "deadlinewhitehouse", 
                           "lawrenceo", "reid", "maddow") ~ "MSNBC",
    means_tougher$label %in% c("andersoncooper", "donlemon", "tapper", "cuomo", 
                           "erinburnett") ~ "CNN",
    means_tougher$label %in% c("thefive", "hannity", "ingrahm_angle", "americasnewsroom",
                           "bretbaier", "tuckercarlson", "foxandfriends", "foxnewsnight",
                           "judgejeanine", "lifelibertylevin", "outnumbered", 
                           "neilcavuto", "maccallum", "americareports") ~ "FOX",
    means_tougher$label %in% c("davidmuir", "cbsevening", "cbsmorning", "gma", "today", 
                           "facethenation", "mtp", "theview", "newshour", "lesterholt",
                           "sixtyminutes") ~ "Broadcast News",
    means_tougher$label %in% c("fallon", "kimmel", "colbert") ~ "Late Night",
    means_tougher$label %in% c("gregkelly") ~ "Newsmax",
    means_tougher$label %in% c("baseball", "football", "basketball",
                           "hockey", "soccer") ~ "Sports"
  )

# construct figure with mean PID for all programs
png(filename = "Means_per_show_tougher.png",
    width = 18, height = 18, units = "cm", type = "cairo-png", res = 300)
means_tougher %>%
  arrange(combined_mean_tougher) %>%    # First sort by val. This sort the dataframe but NOT the factor levels
  mutate(label=factor(label, levels=label)) %>%   # This trick update the factor levels
  ggplot(aes(x=combined_mean_tougher, y=label, color = category, shape = category)) +
  geom_point() +
  scale_shape_manual(values = c(0:5, 19)) +
  scale_color_brewer(type = "qual", palette = "Dark2") +
  geom_vline(xintercept = avg_mean_w7, linetype = "dotted") +
  labs(x = "Average Partisanship of Viewer", y = "",
       caption = "Points represent the average 7-level party identification of viewers of the television program. Dotted line represents mean party identification among all respondents. The average audience for every sport is more politically moderate than the average audience for every news program.") +
  scale_x_continuous(breaks = c(1:7),
                     labels = c("Strong\n Democrat",
                       "Moderate\n Democrat", "Leaning\n Democrat",
                                "Independent", "Leaning\n Republican",
                                "Moderate\n Republican", "Strong\n Republican")) +
  geom_text_repel(aes(label = label), 
                  data = as.data.frame(means_tougher %>% filter(category == "Sports")), 
                  size = 3, box.padding = 1, show.legend = FALSE,
                  color = 'black') +
  theme_minimal() +  
  theme(plot.caption = element_textbox_simple(margin = margin(8, 0, 0, 0), size = 10),
        axis.title=element_text(size=11, face = "bold"),
        axis.text.y=element_blank(),
        axis.ticks.x = element_blank(),
        legend.title = element_blank())
dev.off()
```

PANEL STUDY ANALYSIS

```{r merge_waves_long, echo=FALSE}
# Select the appropriate columns, then replace the election legitimacy for
# wave 6 with the legitimacy for wave 7. Rbind the two DFs together
wave_6$team_sports <- 
  rowMeans(wave_6[,c("football", "basketball",
                     "baseball", "soccer",
                     "hockey")],
           na.rm = TRUE)

# "zero-sum" sports only (used in appendix)
wave_6$zs_sports <- 
  rowMeans(wave_6[,c("football", "basketball",
                     "baseball", "soccer",
                     "hockey", "tennis", "boxingmma")],
           na.rm = TRUE)

# all sports (used in appendix)
wave_6$sports_all <- 
  rowMeans(wave_6[,c("football", "basketball",
                     "baseball", "soccer",
                     "hockey", "tennis", "golf",
                     "boxingmma", "autoracing")],
           na.rm = TRUE)

wave_6_tomerge <- 
  wave_6 %>% dplyr::select(CaseId, team_sports, zs_sports, sports_all,
                    football, basketball, baseball, soccer, hockey,
                    tennis, golf, boxingmma, autoracing,
                    female, white, nonwhite, black, hispanic,
                    asian, age_7, education_4, income_18,
                    republican, democrat, pol_interest,
                    pre_electionlegit, QVOTE_W6, married,
                    disc_black, disc_white, disc_hisp, disc_men, disc_christ)


wave_7_tomerge <-
  wave_7 %>% dplyr::select(CaseId, post_electionlegit, fox_watching,
                           msnbc_watching, cnn_watching, broadcast_watching,
                           newsmax)

# what you want to do now is inner join them, then duplicate, then change 
# variable names as appropriate
merged_w6 <- inner_join(wave_6_tomerge, wave_7_tomerge, by="CaseId")
merged_w7 <- merged_w6

# do the appropriate variable renaming corrections
merged_w6$wave <- 6
merged_w7$wave <- 7
merged_w6$electionlegit <- merged_w6$pre_electionlegit
merged_w7$electionlegit <- merged_w7$post_electionlegit

# final dataset for analysis
wave_67 <- bind_rows(merged_w6, merged_w7)

# belatedly create trump voter variable
wave_67$trump_voter <-
  ifelse(wave_67$QVOTE_W6 == 1, 1, 0)
wave_67$biden_voter <-
  ifelse(wave_67$QVOTE_W6 == 2, 1, 0)
wave_67$inc_nonvoters <-
  case_when(wave_67$trump_voter == 1 ~ "Trump Voter",
            wave_67$biden_voter == 1 ~ "Biden Voter",
            TRUE ~ "Non-Voter")
wave_67$voters_only <-
  ifelse(wave_67$QVOTE_W6 %in% c(1,2), 1, 0)

# make our indexes for perceived discrimination (appendix)
wave_67$status_threat <- 
  rowMeans(wave_67 %>% dplyr::select(disc_white, disc_men, disc_christ))
wave_67$racial_resentment <- 
  rowMeans(wave_67 %>% dplyr::select(disc_black, disc_hisp))

```

```{r fig_4_main_results, echo=FLSE}}
# this figure shows change in perceived election legitimacy by sports watching.
# Do High-Low split comparison for easier visualization
# make the terciles for study 1 visualization

wave_67_for_visual <- wave_67 %>% 
  filter(!is.na(sports_all)) %>%
  filter(!is.na(pre_electionlegit)) %>%
  mutate(ts_split = split_quantile(team_sports, 2))

hause_h1 <-
  hausekeep::seWithin(data = 
                        wave_67_for_visual[wave_67_for_visual$voters_only == 1,],
           measurevar = "electionlegit",
           betweenvars = c("ts_split",
                           "trump_voter"),
           withinvars = "wave",
           idvar = "CaseId")

# turn variables to factors
hause_h1$trump_biden <-
  ifelse(hause_h1$trump_voter == 1, "Trump Supporters",
         "Biden Supporters")
hause_h1$ts_split <-
  factor(hause_h1$ts_split)
hause_h1$wave <-
  factor(hause_h1$wave)

# create annotations
annotation_df <- data.frame(
  label = c("High Sports Watchers", "Low Sports Watchers",
            "High Sports Watchers", "Low Sports Watchers"),
  trump_biden = c("Trump Supporters", "Trump Supporters",
                  "Biden Supporters", "Biden Supporters"),
  x = c(1.65, 1.5, 1.55, 1.4),
  y = c(2.27, 1.91, 2.65, 2.96))

# rename the labels for the plot
facet_names <- c("Trump Supporters" = "Election Losers",
                 "Biden Supporters" = "Election Winners")

# plot figure
png(filename = "img/hypothesis1.png",
    width = 20, height = 12, units = "cm", type = "cairo-png", res = 300)
ggplot(hause_h1) +  
  geom_point(aes(x = wave, y = electionlegit, color = trump_biden)) +
  geom_line(aes(x = wave, y = electionlegit, 
                group = interaction(ts_split, trump_biden), 
                linetype = ts_split,
                color = trump_biden)) +
      geom_errorbar(aes(x = wave, y = electionlegit, ymax = electionlegit + ci,
           ymin = electionlegit - ci, 
           group = interaction(ts_split, trump_biden), 
           width = 0,
           color = trump_biden)) +
  labs(x = "",
       y = "Perceived Election Legitimacy 1-4",
       linetype = "Sports Watching",
       caption = "**Note:** Entries represent mean levels of Perceived Election Legitimacy before and after the 2020 election, among individuals supporting Trump or Biden. Higher scores reflect greater Perceived Election Legitimacy. Vertical bars represent 95% within-subject confidence intervals. The slope for those supporting the winning candidate increases, as expected. The post-election decline in legitimacy among those supporting the losing candidate is significantly less among high sports watchers relative to low sports watchers.") +
  scale_x_discrete(labels = c("Pre-election", "Post-election"),
                   expand = c(0.1,0.1)) +
  scale_color_manual(name = "Candidate Choice",
                     values = c("Biden Supporters" = "royalblue3", 
                                "Trump Supporters" = "red3")) +
  guides(linetype = "none") +
  theme_bw(15) +
  geom_text(
    data = annotation_df,
    mapping = aes(x = x, y = y, label = label)) +
  theme(strip.text.x = element_text(face = "bold", size = 15),
        axis.title.x = element_blank(),
        # use latex2exp and ggtext to enable markdown formatting in 
        # figure caption
        plot.caption = element_textbox_simple(hjust = 0,
                                              width = grid::unit(16, "cm"),
                                              size = 9,
                                              margin = margin(5,0,0,0)))
dev.off()
```

```{r table_1_main_results,echo=FALSE}
wave_67$team_sports_scaled <- scale(wave_67$team_sports)

# filter all NAs out so we are just looking at change
trump_only <- wave_67 %>% filter(QVOTE_W6 == 1) %>% 
  filter(team_sports > 0) %>%
  filter(pre_electionlegit > 0 & post_electionlegit > 0)
biden_only <- wave_67 %>% filter(QVOTE_W6 == 2) %>% 
  filter(team_sports > 0) %>% 
  filter(team_sports > 0) %>%
  filter(pre_electionlegit > 0 & post_electionlegit > 0)

trump_mod1 <- lm_robust(electionlegit~
              wave*team_sports_scaled,
            data = trump_only,
            fixed_effects = ~CaseId,
            se_type = "HC1")
biden_mod1 <- lm_robust(electionlegit~
              wave*team_sports_scaled,
            data = biden_only,
            fixed_effects = ~CaseId,
            se_type = "HC1")

cm_tab1 <- c("wave" = "Wave",
             "wave:team_sports_scaled" = "Wave x Sports Watching")

# use GOF map here to get the number of respondents instead of the number of responses
f <- function(x) format(round(x/2, 1), big.mark="") #big.mark removes the comma for thousands
gm <- list(
  list("raw" = "nobs", "clean" = "N", "fmt" = f))


modelsummary(models = list("Trump Voters" = trump_mod1, "Biden Voters" = biden_mod1),
             estimate = "{estimate} ({std.error}) {stars}",
             statistic = NULL,
             notes = list("* p < .05, ** p < .01, *** p < .001",
                          "All models use within-person fixed effects; sports watching measured in standard deviations"),
             stars = c("*" = .05, "**" = .01, "***" = .001), 
             coef_map = cm_tab1,
             align = "ldd",
             gof_map = gm,
             output = "latex")
```

```{r table_2_fox_comparison,echo=FALSE}
# this table contextualizes sports watching compared to Fox news

# same trump definition as above; restrict to respondents who have the data
trump_only <- wave_67 %>% filter(QVOTE_W6 == 1) %>% 
  filter(team_sports > 0) %>%
  filter(pre_electionlegit > 0 & post_electionlegit > 0)

# look at Trump voters change over time
trump_voters <- wave_67[wave_67$trump_voter == 1,]

# just look at fox news first
study1.foxmodel <- lm_robust(electionlegit~
                              fox_watching*wave, 
                                   data = trump_only, 
                                   fixed_effects = ~ CaseId,
                                   se_type = "HC1")
# sports among election losers
study1.sportsmodel <- lm_robust(electionlegit~
                              team_sports_scaled*wave, 
                                   data = trump_only, 
                                   fixed_effects = ~ CaseId,
                                   se_type = "HC1")
tab2_cm <-
  c("wave" = "Wave",
    "fox_watching:wave" = "Wave x Fox News Watching",
    "team_sports_scaled:wave" = "Wave x Sports Watching")

modelsummary(
  models = list("Fox News Watching" = study1.foxmodel,
                "Team Sports Watching" = study1.sportsmodel),
             title = "Change in Perceived Election Legitimacy among Trump Supporters",
             estimate = "{estimate} ({std.error}) {stars}",
             statistic = NULL,
             notes = list("* p < .05, ** p < .01, *** p < .001",
                          "All models use within-person fixed effects; sports and Fox watching measured in standard deviations"),
             stars = c("*" = .05, "**" = .01, "***" = .001), 
             coef_map = tab2_cm,
             align = "ldd",
             gof_map = gm,
             escape=FALSE,
             output = "latex")
```

```{r table_3_robustness, echo=FALSE}
# Table 3 testing a variety of alternative model specifications

# baseline model
robust_baseline <- 
  lm_robust(electionlegit~team_sports_scaled*wave, 
                                   data = trump_only, 
                                   fixed_effects = ~ CaseId,
                                   se_type = "HC1")

# include other media consumption
robust_media <- lm_robust(electionlegit ~ 
                      fox_watching*wave +
                               msnbc_watching*wave +
                               cnn_watching*wave +
                               broadcast_watching*wave + 
                               newsmax*wave +
                      team_sports_scaled*wave|
                      CaseId, 
                    data = trump_only,
                    fixed_effects = ~ CaseId,
                                   se_type = "HC1")

# include racial attitudes
robust_race <- lm_robust(electionlegit ~ 
                            status_threat*wave +
                      racial_resentment*wave +
                      team_sports_scaled*wave,
                      data = trump_only,
                    fixed_effects = ~ CaseId,
                                   se_type = "HC1")

# include demographics
robust_demos <- lm_robust(electionlegit ~ 
                      age_7*wave + 
                      female*wave +
                      nonwhite*wave +
                      education_4*wave +
                      income_18*wave +
                      team_sports_scaled*wave|
                      CaseId, 
                    data = trump_only,
                    fixed_effects = ~ CaseId,
                                   se_type = "HC1")

robust_cm <- 
    c("wave" = "Wave",
      "team_sports_scaled:wave" = "Sports x Wave",
      "wave:team_sports_scaled" = "Sports x Wave",
      "fox_watching:wave" = "Fox News x Wave",
      "wave:msnbc_watching" = "MSNBC x Wave",
      "wave:cnn_watching" = "CNN x Wave",
      "wave:broadcast_watching" = "Broadcast News x Wave",
      "wave:newsmax" = "Newsmax x Wave",
      "status_threat:wave" = "Status Threat x Wave",
      "wave:racial_resentment" = "Perceived Prejudice x Wave",
      "age_7:wave" = "Age x Wave",
      "wave:female" = "Female x Wave",
      "wave:nonwhite" = "Non-White x Wave",
      "wave:education_4" = "Education x Wave",
      "wave:income_18" = "Income x Wave")

modelsummary(models = list("Baseline" = robust_baseline,
                           "Including Media" = robust_media,
                           "Including Race/Status Attitudes" = robust_race,
                           "Including Demographics" = robust_demos),
             align = "ldddd",
             stars = T,
             fmt = fmt_sprintf("%.3f"),
             coef_map = robust_cm,
             gof_map = gm,
             title = "Models Predicting Change in Perceived Election Legitimacy Among Trump Supporters",
             output = "latex")
```

EXPERIMENT STUDY ANALYSIS

```{r experiment_setup, echo=FALSE}
#### IMPORT DATA ####
raw_qual <- read.csv("data/electionlegit_experiment/230626_results.csv")
# First 5 rows are test runs, remove
experiment_data <- raw_qual[-(1:5),]
# get party identification provided by survey company
demographics <- read.csv("data/electionlegit_experiment/respondent_party.csv")
# merge
data_merged <- inner_join(experiment_data, demographics, by = c("RESPONDENT_ID"="id"))

#### CONSTRUCT VARIABLES ####
## Main DV ##
data_merged$DV1 <- case_when( # NORMAL ORDER
  data_merged$Main_DV_1 == "Not at all likely" ~ 1,
  data_merged$Main_DV_1 == "Not too likely" ~ 2,
  data_merged$Main_DV_1 == "Somewhat likely" ~ 3,
  data_merged$Main_DV_1 == "Very likely" ~ 4
)
data_merged$DV2 <- case_when( # REVERSE
  data_merged$Main_DV_2 == "Not at all likely" ~ 4,
  data_merged$Main_DV_2 == "Not too likely" ~ 3,
  data_merged$Main_DV_2 == "Somewhat likely" ~ 2,
  data_merged$Main_DV_2 == "Very likely" ~ 1
)
data_merged$DV3 <- case_when( # NORMAL ORDER
  data_merged$Main_DV_3 == "Not at all likely" ~ 1,
  data_merged$Main_DV_3 == "Not too likely" ~ 2,
  data_merged$Main_DV_3 == "Somewhat likely" ~ 3,
  data_merged$Main_DV_3 == "Very likely" ~ 4
)
data_merged$DV4 <- case_when( # REVERSE
  data_merged$Main_DV_4 == "Not at all likely" ~ 4,
  data_merged$Main_DV_4 == "Not too likely" ~ 3,
  data_merged$Main_DV_4 == "Somewhat likely" ~ 2,
  data_merged$Main_DV_4 == "Very likely" ~ 1
)
data_merged$DV5 <- case_when( # REVERSE
  data_merged$Main_DV_5 == "Not at all likely" ~ 4,
  data_merged$Main_DV_5 == "Not too likely" ~ 3,
  data_merged$Main_DV_5 == "Somewhat likely" ~ 2,
  data_merged$Main_DV_5 == "Very likely" ~ 1
)
data_merged$DV6 <- case_when( # REVERSE
  data_merged$Main_DV_6 == "Not at all likely" ~ 4,
  data_merged$Main_DV_6 == "Not too likely" ~ 3,
  data_merged$Main_DV_6 == "Somewhat likely" ~ 2,
  data_merged$Main_DV_6 == "Very likely" ~ 1
)
data_merged$DV7 <- case_when( # REVERSE
  data_merged$Main_DV_7 == "Not at all likely" ~ 4,
  data_merged$Main_DV_7 == "Not too likely" ~ 3,
  data_merged$Main_DV_7 == "Somewhat likely" ~ 2,
  data_merged$Main_DV_7 == "Very likely" ~ 1
)
data_merged$DV8 <- case_when( # REVERSE
  data_merged$Main_DV_8 == "Not at all likely" ~ 4,
  data_merged$Main_DV_8 == "Not too likely" ~ 3,
  data_merged$Main_DV_8 == "Somewhat likely" ~ 2,
  data_merged$Main_DV_8 == "Very likely" ~ 1
)
# psych::alpha(data_merged %>% select(DV1, DV2, DV3, DV4, DV5, DV6, DV7, DV8))
# .84

data_merged$electionlegit <- 
  rowMeans(data_merged %>% 
             dplyr::select(DV1, DV2, DV3, DV4, DV5, DV6, DV7, DV8))

# create Trump feeling thermometer
data_merged$trump_therm <- 
  as.numeric(data_merged$feeling_therm_1)

# NOTE - you meant to operationalize feeling thermometer as TrumpTherm-BidenTherm
# but you preregistered it wrong, as just TrumpTherm. 
# So you are using that preregistered version.

# look at dem vs rep PID - higher numbers = REP
# Because we asked Forthright for no independents we have no pure independents,
# so 4 is both the middle of possible values and nobody scores a 4

data_merged$party <- ifelse(data_merged$political_party_preference < 4, 
                            "Democrat", "Republican")

# qualtrics stored info on condition assignment as strings; convert to numeric
data_merged$sports_concession <- as.numeric(data_merged$sports_treatment)
data_merged$political_concession <- as.numeric(data_merged$political_treatment)

```

```{r fig5_experiment_main_results, echo=FALSE}
# this figure shows changes by political and sports concession condition
# main results for the experiment

# run preregistered model
experiment.model <- lm_robust(electionlegit ~ sports_concession +
                                political_concession +
                                trump_therm + age + education,
                              data = data_merged, se="HC2")

# Use Emmeans to get the covariate-adjusted means
EMM1 <- as.data.frame(emmeans(experiment.model, ~ political_concession))
EMM1$type <- "Political Concession"
EMM1 <- EMM1 %>% 
  dplyr::rename(treatment = political_concession)
EMM2 <- as.data.frame(emmeans(experiment.model, ~ sports_concession))
EMM2$type <- "Sports Concession"
EMM2 <- EMM2 %>% 
  dplyr::rename(treatment = sports_concession)
combined_marginals <- rbind(EMM1, EMM2)
combined_marginals$treatment <- factor(combined_marginals$treatment,
                                          levels = c(0, 1),
                                          labels = c("No", "Yes"))

# create DF for annotating figure
annotation_df <- data.frame(
  type = c("Political Concession","Sports Concession"),
  start = c("No", "No"),
  end = c("Yes", "Yes"),
  y = c(combined_marginals[2,"upper.CL"] + .02, 
        combined_marginals[4,"upper.CL"] + .02),
  label = c("p < .001", "p < .05")
)

# construct figure
png(filename = "img/marginal_means.png",
    width = 20, height = 14, units = "cm", type = "cairo-png", res = 300)
ggplot(data = combined_marginals, aes(x = treatment, y = emmean)) +
  labs(x = "", y = "Perceived Election Legitimacy (1-4)", 
       caption = "**Note:** Entries represent results from an experiment randomly assigning a nationwide sample to see short excerpts from actual political and/or sports concession statements from the losers. Means are adjusted for covariates age, education and Trump feeling thermometer. Higher scores reflect greater Perceived Election Legitimacy among those whose favored candidate lost. Bars represent 95% confidence intervals. Both comparisons are significantly different from control conditions, with no significant interaction.
") +
  geom_point(size = 3) +
  ylim(2.37,2.67) +
  geom_errorbar(aes(ymax = upper.CL,
                    ymin = lower.CL),
                width = 0) +
  geom_signif(
    data = annotation_df,
    aes(xmin = start, xmax = end,
        annotations = label,
        y_position = y),
    tip_length = 0, manual = T) +
  facet_wrap(vars(type), ncol = 2) +
  theme_bw(15) +
  theme(strip.text.x = element_text(face = "bold", size = 15),
        plot.caption = element_textbox_simple(hjust = 0,
                                              width = grid::unit(17, "cm"),
                                              size = 9,
                                              margin = margin(0,0,0,0)))
dev.off()
```

APPENDIX

```{r table4_demographics_over_time}
# analyze demographic information for the respondents used in the panel study
# and make a table comparing demos to CPS at the time

#### CONVERT DEMOGRAPHIC INFORMATION TO CPS FORMAT ####
# income
wave_6$income_fourlevel <- case_when(wave_6$INCOME_W6 %in% c(1,2,3,4,5,6) ~ 1,
                                     wave_6$INCOME_W6 %in% c(7,8,9,10,11) ~ 2,
                                     wave_6$INCOME_W6 %in% c(12,13,14) ~ 3,
                                     wave_6$INCOME_W6 %in% c(15,16,17,18) ~ 4)

# age
wave_6$age_fourlevel <- case_when(wave_6$AGE_W6 < 35 ~ 1,
                                  wave_6$AGE_W6 >= 35 & wave_6$AGE_W6 < 50 ~ 2,
                                  wave_6$AGE_W6 >= 50 & wave_6$AGE_W6 < 65 ~ 3,
                                  wave_6$AGE_W6 >= 65 ~ 4)

# race
wave_6$race_fivelevel <- case_when(wave_6$RACETHNICITY_W6 == 1 ~ 1,
                                   wave_6$RACETHNICITY_W6 == 2 ~ 2,
                                   wave_6$RACETHNICITY_W6 == 4 ~ 3,
                                   wave_6$RACETHNICITY_W6 == 6 ~ 4,
                                   wave_6$RACETHNICITY_W6 %in% c(3,5) ~ 5)

# education
wave_6$edu_fivelevel <- case_when(wave_6$EDUC_W6 %in% c(1,2,3,4,5,6,7,8) ~ 1,
                                  wave_6$EDUC_W6 == 9 ~ 2,
                                  wave_6$EDUC_W6 %in% c(10,11) ~ 3,
                                  wave_6$EDUC_W6 == 12 ~ 4,
                                  wave_6$EDUC_W6 %in% c(13,14) ~ 5)

# sex
wave_6$sex <- case_when(wave_6$GENDER1_W6 == 1 ~ 1,
                        wave_6$GENDER1_W6 == 2 ~ 2)

# home ownership
wave_6$housing_twolevel <- case_when(wave_6$HOUSING_W6 == 1 ~ 1,
                                     wave_6$HOUSING_W6 %in% c(2,3) ~ 2)

# children
wave_6$children_twolevel <- case_when(wave_6$HH01_W6 > 0 | 
                                    wave_6$HH25_W6 > 0 |
                                    wave_6$HH612_W6 > 0 |
                                    wave_6$HH1317_W6 > 0 ~ 1,
                                  wave_6$HH01_W6 == 0 &
                                    wave_6$HH25_W6 == 0 &
                                    wave_6$HH612_W6 == 0 &
                                    wave_6$HH1317_W6 == 0 ~ 2)

# marital status
wave_6$married_twolevel <- case_when(wave_6$MARITAL_W6 == 1 ~ 1,
                                     wave_6$MARITAL_W6 %in% c(2,3,4,5,6) ~ 2)

#### ORGANIZE RESPONDENTS ####
# filter to just the respondents used in the study
panel_df <- wave_6 %>% 
  filter(!is.na(wave_6$sports_all)) %>%
  inner_join(wave_7, by = "CaseId")

# get set of respondents who dropped between the waves
attrition_df <- wave_6 %>% dplyr::filter(
  !CaseId %in% panel_df$CaseId
)

#### ASSEMBLE SUB-TABLES ####
# Assemble table containing all panel demographics
panel_df_income <- c(sum(panel_df$income_fourlevel == 1),
                   sum(panel_df$income_fourlevel == 2),
                   sum(panel_df$income_fourlevel == 3),
                   sum(panel_df$income_fourlevel == 4))*100/nrow(panel_df)

panel_df_age <- c(sum(panel_df$age_fourlevel == 1),
                sum(panel_df$age_fourlevel == 2),
                sum(panel_df$age_fourlevel == 3),
                sum(panel_df$age_fourlevel == 4))*100/nrow(panel_df)

panel_df_race <- c(sum(panel_df$race_fivelevel == 1),
                 sum(panel_df$race_fivelevel == 2),
                 sum(panel_df$race_fivelevel == 3),
                 sum(panel_df$race_fivelevel == 4),
                 sum(panel_df$race_fivelevel == 5))*100/nrow(panel_df)

panel_df_edu <- c(sum(panel_df$edu_fivelevel == 1),
                sum(panel_df$edu_fivelevel == 2),
                sum(panel_df$edu_fivelevel == 3),
                sum(panel_df$edu_fivelevel == 4),
                sum(panel_df$edu_fivelevel == 5))*100/nrow(panel_df)

panel_df_household <- c(sum(panel_df$housing_twolevel == 1),
                      sum(panel_df$housing_twolevel == 2))*100/nrow(panel_df)

panel_df_children <- c(sum(panel_df$children_twolevel == 1),
                     sum(panel_df$children_twolevel == 2))*100/nrow(panel_df)

panel_df_marital <- c(sum(panel_df$married_twolevel == 1),
                    sum(panel_df$married_twolevel == 2))*100/nrow(panel_df)

panel_df_sex <- c(sum(panel_df$sex == 1, na.rm = T),
                   sum(panel_df$sex == 2, na.rm = T))*100/nrow(panel_df)

# concatenate all of the smaller dataframes into the big one
panel_df_all <- round(c(panel_df_income, panel_df_age, 
                        panel_df_race, panel_df_edu,
                        panel_df_household, panel_df_children, 
                        panel_df_marital, panel_df_sex), 1)

# Benchmarks from Feb 2020 Current Population Survey.
benchmark <-  c(17.5, 33.1, 24.6, 24.9, #income
                29.3, 24.3, 24.9, 21.5, # age
                62.8, 11.9, 16.7, 6.4, 2.2, # race
                9.8, 28.2, 27.7, 21.8, 21.4, # education
                67.5, 32.5, # household
                33.1, 66.9, # children
                52.6, 47.4, # marital
                48.3, 51.7) # sex

group <- c("Income", "Income", "Income", "Income",
           "Age", "Age", "Age", "Age",
           "Race", "Race", "Race", "Race", "Race",
           "Education","Education","Education","Education","Education",
           "Household","Household",
           "Children","Children",
           "Marital", "Marital",
           "Sex","Sex")

rownames <- c("Less than $30,000", "$30,000 to $74,999", "$75,000 to $124,999", "More than $125,000",
              "18 - 34", "35 - 49", "50 - 64", "65+",
              "Non-Hispanic White", "Non-Hispanic Black", "Hispanic", "Non-Hispanic Asian/Pacific Islander", "Non-Hispanic Others",
              "Less than High School", "High School or Equivalent", "Some College/Associate Degree", "Bachlor's Degree", "Graduate Degree",
              "Owner Occupied", "Renter Occupied/Other",
              "With 1+ Under 18 Years", "Without Children Under 18",
              "Currently Married", "Separated/Divorced/Widowed/Single",
              "Male", "Female")

# assemble the final big table
demographics_df <- data.frame(panel_df_all, benchmark)
row.names(demographics_df) <- rownames

#### CONSTRUCT FINAL LATEX TABLE ####
kbl(demographics_df, caption = "Respondent Demographics Relative to Population Benchmarks", 
    booktabs = T,
    col.names = c("Panel Respondents", "US Population, Feb 2020"),
    format = "latex") %>% 
  kable_styling(latex_options = "hold_position") %>% 
  pack_rows("Income", 1, 4) %>% 
  pack_rows("Age", 5, 8) %>%
  pack_rows("Race/Ethnicity", 9, 13) %>%
  pack_rows("Education Status", 14, 18) %>%
  pack_rows("Household Ownership", 19, 20) %>%
  pack_rows("Children in Household", 21, 22) %>%
  pack_rows("Marital Status", 23, 24) %>%
  pack_rows("Sex", 25, 26) %>%
  footnote(general = "Percentages for panel respondents represent unweighted proportions of the respondents who participated in both waves of the panel. Population benchmark calculated based on Census Bureau Current Population Survey, February 2020. Percentages for each demographic may not sum to 100 due to rounding error.", threeparttable = T)
```

```{r table_5_attrition, echo=FALSE}
# same as above basically, you are just using the attrition DF instead
attrition_df_income <- c(sum(attrition_df$income_fourlevel == 1),
                   sum(attrition_df$income_fourlevel == 2),
                   sum(attrition_df$income_fourlevel == 3),
                   sum(attrition_df$income_fourlevel == 4))*100/nrow(attrition_df)

attrition_df_age <- c(sum(attrition_df$age_fourlevel == 1),
                sum(attrition_df$age_fourlevel == 2),
                sum(attrition_df$age_fourlevel == 3),
                sum(attrition_df$age_fourlevel == 4))*100/nrow(attrition_df)

attrition_df_race <- c(sum(attrition_df$race_fivelevel == 1),
                 sum(attrition_df$race_fivelevel == 2),
                 sum(attrition_df$race_fivelevel == 3),
                 sum(attrition_df$race_fivelevel == 4),
                 sum(attrition_df$race_fivelevel == 5))*100/nrow(attrition_df)

attrition_df_edu <- c(sum(attrition_df$edu_fivelevel == 1),
                sum(attrition_df$edu_fivelevel == 2),
                sum(attrition_df$edu_fivelevel == 3),
                sum(attrition_df$edu_fivelevel == 4),
                sum(attrition_df$edu_fivelevel == 5))*100/nrow(attrition_df)

attrition_df_household <- c(sum(attrition_df$housing_twolevel == 1),
                            sum(attrition_df$housing_twolevel == 2))*100/nrow(attrition_df)

attrition_df_children <- c(sum(attrition_df$children_twolevel == 1),
                     sum(attrition_df$children_twolevel == 2))*100/nrow(attrition_df)

attrition_df_marital <- c(sum(attrition_df$married_twolevel == 1),
                    sum(attrition_df$married_twolevel == 2))*100/nrow(attrition_df)

attrition_df_sex <- c(sum(attrition_df$sex == 1, na.rm = T),
                   sum(attrition_df$sex == 2, na.rm = T))*100/nrow(attrition_df)

attrition_df_all <- round(c(attrition_df_income, attrition_df_age,
                            attrition_df_race, attrition_df_edu,
                        attrition_df_household, attrition_df_children, 
                        attrition_df_marital, attrition_df_sex), 1)


demo_attr_df <- data.frame(panel_df_all, attrition_df_all)
row.names(demo_attr_df) <- rownames

kbl(demo_attr_df, caption = "Panel Respondent Demographics Relative to Attrition Respondent Demographics", 
    booktabs = T,
    col.names = c("Panel Respondents", "Attrition"),
    format = "latex") %>% 
  kable_styling(latex_options = "hold_position",
                full_width = T) %>% 
  pack_rows("Income", 1, 4) %>% 
  pack_rows("Age", 5, 8) %>%
  pack_rows("Race/Ethnicity", 9, 13) %>%
  pack_rows("Education Status", 14, 18) %>%
  pack_rows("Household Ownership", 19, 20) %>%
  pack_rows("Children in Household", 21, 22) %>%
  pack_rows("Marital Status", 23, 24) %>%
  pack_rows("Sex", 25, 26) %>%
  footnote(general = "Percentages for attrition represent unweighted proportions of respondents who participated in the first wave of the panel but not the second. Percentages for panel respondents represent unweighted proportions of the respondents who participated in both waves of the panel. Percentages for each demographic may not sum to 100 due to rounding error.")
```

```{r fig_6_sportsbyparty, echo=FALSE}}
# descriptively show levels of sports watching by party

# Convert PID measure to 3 parties
wave_6$party3 <- case_when(
  wave_6$PARTYID5_W6 %in% c(1,2) ~ "Democrat",
  wave_6$PARTYID5_W6 %in% c(3) ~ "Independent",
  wave_6$PARTYID5_W6 %in% c(4,5) ~ "Republican"
)

sports_df <- wave_6 %>% select(
  football, basketball, baseball, soccer, hockey, autoracing, tennis,
  golf, boxingmma, party3)

all_sports_views <- sports_df %>%
  pivot_longer(!party3, names_to = "Sport", values_to = "Viewership") %>%
  group_by(Sport) %>%
  dplyr::summarise(mean = mean(Viewership, na.rm = TRUE))

# reorder factor levels for our sports so higher viewership ones come first
sports_factor_levels <- 
  pull(all_sports_views[order(all_sports_views$mean,decreasing = TRUE),"Sport"])

party_sports_summary <- sports_df %>%
  # drop respondents who do not specify party
  filter(party3 %in% c("Democrat", "Independent", "Republican")) %>%
  pivot_longer(!party3, names_to = "Sport", values_to = "Viewership") %>%
  group_by(party3, Sport) %>%
  dplyr::summarise(mean = mean(Viewership, na.rm = TRUE),
            sd = sd(Viewership, na.rm = TRUE),
            n = n()) %>%
  mutate(se = sd / sqrt(n),
         lower.ci = 
           mean - qt(1 - (0.05 / 2), n - 1) * se,
         upper.ci = 
           mean + qt(1 - (0.05 / 2), n - 1) * se)

party_sports_summary$Sport <- 
  factor(party_sports_summary$Sport, levels = sports_factor_levels)

# spit out image
png(filename = "img/viewership_by_sport.png",
    width = 20, height = 12, units = "cm", type = "cairo-png", res = 300)
ggplot(data = party_sports_summary, 
                aes(x = Sport, 
                    color = party3,
                    y = mean,
                    ymax = upper.ci,
                    ymin = lower.ci)) +
  labs(x = "", y = "Mean Viewership (1-5)", color = "Party",
       caption ="Error bars represent 95% confidence intervals") +
  geom_point(position = position_dodge(width = 0.1)) +
  scale_color_manual(values = c("dodgerblue3","forestgreen","firebrick3")) +
  geom_errorbar(width = 0, position = position_dodge(width = 0.1)) +
  theme_bw(15) +
  guides(color = guide_legend(position = "inside")) +
  theme(legend.position = c(0.8, 0.7), # c(0,0) left bottom, c(1,1) right top.
  legend.background = element_rect(fill = "white", colour = NA))
dev.off()
```

```{r fig_7_sensitivity, echo=FALSE}}
# sensemakr doesn't seem to be compatible with estimatr, so I'm using feols

# This was the model with the weakest relationship between watching sports and electionlegit, 
# so we are using it as the most conservative test of the theory
sense_feols <- feols(electionlegit ~ 
                       team_sports_scaled*wave +
                       age_7*wave + 
                       female*wave +
                       nonwhite*wave +
                       education_4*wave +
                      income_18*wave|
                      CaseId, 
                    data = trump_only)

demo_feols.sensitivity <- 
  sensemakr(sense_feols, 
            treatment = "team_sports_scaled:wave",
            benchmark_covariates = c("wave:female"), # biggest confound
            kd = 1:3,
            ky = 1:3, 
            q = 1,
            alpha = 0.05, 
            reduce = TRUE)

# output Latex code
ovb_minimal_reporting(demo_feols.sensitivity)

# output figure
png(filename = "img/ovb_sens.png",
    width = 12, height = 12, units = "cm", type = "cairo-png", res = 300)
plot(demo_feols.sensitivity, lim = .11, lim.y = .11, 
     xlab = expression(paste("Partial ", R^2, " of confounder(s) with sports watching")),
     ylab = expression(paste("Partial ", R^2, " of confounder(s) with ", Delta, " perceived election legitimacy")),
     nlevels = 6)
dev.off()
```

```{r fig_8_results_by_sport, echo=FALSE}}
# make our figure where we show the results of the models for each sport

# scale viewership for each sport to enable comparisons
wave_67$football_scaled <- scale(wave_67$football)
wave_67$baseball_scaled <- scale(wave_67$baseball)
wave_67$basketball_scaled <- scale(wave_67$basketball)
wave_67$hockey_scaled <- scale(wave_67$hockey)
wave_67$soccer_scaled <- scale(wave_67$soccer)
wave_67$tennis_scaled <- scale(wave_67$tennis)
wave_67$boxingmma_scaled <- scale(wave_67$boxingmma)
wave_67$golf_scaled <- scale(wave_67$golf)
wave_67$autoracing_scaled <- scale(wave_67$autoracing)

# define individual sports versus team sports
wave_67$individual_sports <- 
  rowSums(wave_67[,c("tennis","boxingmma","golf","autoracing")], na.rm = T)
# already scaled team sports so just need individual ones
wave_67$individual_sports_scaled <- scale(wave_67$individual_sports) 
wave_67$individual_sports_mean <- 
  rowMeans(wave_67[,c("tennis","boxingmma","golf","autoracing")], na.rm = T)
wave_67$team_sports_mean <- 
  rowMeans(wave_67[,c("football","baseball","basketball",
                      "hockey","soccer")], na.rm = T)

# standard definition of trump voters
trump_only <- wave_67 %>% filter(QVOTE_W6 == 1) %>% 
  filter(team_sports > 0) %>%
  filter(pre_electionlegit > 0 & post_electionlegit > 0)

#### DEFINTE FUNCTION FOR COLLECTING MODEL COEFFICIENTS ####
collect_mean_sd <-function(model, name){
  mean = model$coefficients[2]
  sd = model$std.error[2]
  conf.high = mean + qnorm(.975)*sd
  conf.low = mean - qnorm(.975)*sd
  return(c(mean, sd, conf.low, conf.high, name))
}

#### RUN MODELS FOR EVERY SPORT ####
football_model <-
  lm_robust(electionlegit~
              wave +
              wave:football_scaled,
            data = trump_only,
            fixed_effects = ~CaseId,
            se_type = "HC1")
football_row <- collect_mean_sd(football_model, "Football")

baseball_model <-
  lm_robust(electionlegit~
              wave +
              wave:baseball_scaled,
            data = trump_only,
            fixed_effects = ~CaseId,
            se_type = "HC1")
baseball_row <- collect_mean_sd(baseball_model, "Baseball")

basketball_model <-
  lm_robust(electionlegit~
              wave +
              wave:basketball_scaled,
            data = trump_only,
            fixed_effects = ~CaseId,
            se_type = "HC1")
basketball_row <- collect_mean_sd(basketball_model, "Basketball")

hockey_model <-
  lm_robust(electionlegit~
              wave +
              wave:hockey_scaled,
            data = trump_only,
            fixed_effects = ~CaseId,
            se_type = "HC1")
hockey_row <- collect_mean_sd(hockey_model, "Hockey")

soccer_model <-
  lm_robust(electionlegit~
              wave +
              wave:soccer_scaled,
            data = trump_only,
            fixed_effects = ~CaseId,
            se_type = "HC1")
soccer_row <- collect_mean_sd(soccer_model, "Soccer")

tennis_model <-
  lm_robust(electionlegit~
              wave +
              wave:tennis_scaled,
            data = trump_only,
            fixed_effects = ~CaseId,
            se_type = "HC1")
tennis_row <- collect_mean_sd(tennis_model, "Tennis")

boxingmma_model <-
  lm_robust(electionlegit~
              wave +
              wave:boxingmma_scaled,
            data = trump_only,
            fixed_effects = ~CaseId,
            se_type = "HC1")
boxingmma_row <- collect_mean_sd(boxingmma_model, "Boxing/MMA")

golf_model <-
  lm_robust(electionlegit~
              wave +
              wave:golf_scaled,
            data = trump_only,
            fixed_effects = ~CaseId,
            se_type = "HC1")
golf_row <- collect_mean_sd(golf_model, "Golf")

autoracing_model <-
  lm_robust(electionlegit~
              wave +
              wave:autoracing_scaled,
            data = trump_only,
            fixed_effects = ~CaseId,
            se_type = "HC1")
autoracing_row <- collect_mean_sd(autoracing_model, "Auto Racing")

#### BUILD OVERALL DF ####
allsports_df <- 
  rbind(football_row, baseball_row, basketball_row, hockey_row, soccer_row, tennis_row, boxingmma_row, golf_row, autoracing_row)
allsports_df <- as.data.frame(allsports_df)
colnames(allsports_df) <- c("mean", "sd", "conf.low", "conf.high", "name")
allsports_df$mean <- as.numeric(allsports_df$mean)
allsports_df$sd <- as.numeric(allsports_df$sd)
allsports_df$conf.low <- as.numeric(allsports_df$conf.low)
allsports_df$conf.high <- as.numeric(allsports_df$conf.high)

allsports_df$ordered_name <-
  factor(allsports_df$name, levels=allsports_df[order(allsports_df$mean), "name"])

#### PLOT ####
allsports_coefs <-
  ggplot(allsports_df[order(allsports_df$mean),], aes(x=ordered_name, y=mean, ymin=conf.low, ymax=conf.high)) +
  geom_pointrange() +
  coord_flip() +
  theme_minimal() +
  geom_hline(yintercept = 0, color = "red") +
  labs(x = "Sport",
       y = "Coefficient",
       caption = "Coefficients represent the predicted change in perceived election legitimacy among Trump supporters on a 1-4 scale given a one standard deviation increase in watching a particular sport.") +
  theme_bw() +
  theme(plot.caption = element_textbox_simple(margin = margin(8, 0, 0, 0), size = 10))

png(filename = "img/sports_coef_all.png",
    width = 16, height = 12, units = "cm", type = "cairo-png", res = 300)
allsports_coefs
dev.off()
```

```{r fig_9_grouped_by_sport, echo=FALSE}}
individual_sports_model <- lm_robust(electionlegit~
                                       wave +
                                       wave:individual_sports_scaled,
                                     data = trump_only,
                                     fixed_effects = ~CaseId,
                                     se_type = "HC1")
individual_row <- collect_mean_sd(individual_sports_model, "Individual Sports")

team_sports_model <- lm_robust(electionlegit~
                                       wave +
                                       wave:team_sports_scaled,
                                     data = trump_only,
                                     fixed_effects = ~CaseId,
                                     se_type = "HC1")
team_row <- collect_mean_sd(team_sports_model, "Team Sports")

aggregatesports_df <- 
  rbind(individual_row, team_row)
aggregatesports_df <- as.data.frame(aggregatesports_df)
colnames(aggregatesports_df) <- c("mean", "sd", "conf.low", "conf.high", "name")
aggregatesports_df$mean <- as.numeric(aggregatesports_df$mean)
aggregatesports_df$sd <- as.numeric(aggregatesports_df$sd)
aggregatesports_df$conf.low <- as.numeric(aggregatesports_df$conf.low)
aggregatesports_df$conf.high <- as.numeric(aggregatesports_df$conf.high)

aggregate_sports_coefs <-
  ggplot(aggregatesports_df, aes(x=name, y=mean, ymin=conf.low, ymax=conf.high)) +
  geom_pointrange() +
  coord_flip() +
  theme_minimal() +
  geom_hline(yintercept = 0, color = "red") +
  labs(x = "",
       y = "Coefficient",
       caption = "Coefficients represent the predicted change in perceived election legitimacy among Trump supporters on a 1-4 scale given a one standard deviation increase in watching a set of sports.") +
  theme_bw() +
  theme(plot.caption = element_textbox_simple(margin = margin(8, 0, 0, 0), size = 10))

png(filename = "img/agg_sports_coef_comparison.png",
    width = 16, height = 12, units = "cm", type = "cairo-png", res = 300)
aggregate_sports_coefs
dev.off()
```

```{r table_6_experiment_table, echo=FALSE}}
# This table shows the experiment results in table rather than figure form.
experiment_cm <- 
    c("sports_concession" = "Sports Concession",
    "political_concession" = "Political Concession",
    "trump_therm" = "Trump Feeling Thermometer",
    "age" = "Age",
    "education" = "Education")

modelsummary(models = 
               list("Perceived Election Legitimacy" = experiment.model),
             title = "Effect of Legitimating Rituals on Perceived Election
             Legitimacy",
             estimate = "{estimate} ({std.error}) {stars}",
             statistic = NULL,
             align = "ld",
             fmt = fmt_sprintf("%.3f"),
             notes = list("* p < .05, ** p < .01, *** p < .001"),
             stars = c("*" = .05, "**" = .01, "***" = .001), 
             coef_map = experiment_cm,
             gof_map = "nobs",
             escape=FALSE,
             output = "latex")
```

```{r figure_10_raw_experiment, echo=FALSE}}
# this figure shows the raw means by condition for the experiment

# split our two factors into 4 conditions
data_merged$sports_politics <- 
  interaction(data_merged$political_treatment, data_merged$sports_treatment)

# we need to know what condition in the 2x2 each respondent was in
data_merged$condition_4 <-
  case_when(data_merged$sports_politics == "0.0" ~ "No Concession",
            data_merged$sports_politics == "1.0" ~ "Politics Concession Only",
            data_merged$sports_politics == "0.1" ~ "Sports Concession Only",
            data_merged$sports_politics == "1.1" ~ "Sports and Politics Concessions")

# get means and confidence intervals for each condition
toplot_4conditions <- 
  data_merged %>% select(condition_4, electionlegit) %>%
  group_by(condition_4) %>%
  dplyr::summarise(mean.dv = mean(electionlegit, na.rm = TRUE),
          sd.dv = sd(electionlegit, na.rm = TRUE),
          n.dv = n()) %>%
  dplyr::mutate(se.dv = sd.dv / sqrt(n.dv),
         lower.ci.dv = 
           mean.dv - qt(1 - (0.05 / 2), n.dv - 1) * se.dv,
         upper.ci.dv = 
           mean.dv + qt(1 - (0.05 / 2), n.dv - 1) * se.dv,
         lower.se.dv = mean.dv - se.dv,
         upper.se.dv = mean.dv + se.dv)

# change the factor order for the table
toplot_4conditions$condition_4 <- 
  factor(toplot_4conditions$condition_4,
            levels = c("No Concession", "Sports Concession Only",
                       "Politics Concession Only", "Sports and Politics Concessions"),
            labels = c("No Concession", "Sports Concession \nOnly",
                       "Politics Concession \nOnly", "Sports and \nPolitics Concessions"))

# dump to image
png(filename = "img/four_conditions.png",
    width = 20, height = 12, units = "cm", type = "cairo-png", res = 300)
ggplot(data = toplot_4conditions, aes(x = condition_4, y = mean.dv)) +
  labs(x = "", y = "Perceived Election Legitimacy (1-4)", 
       caption = "Error bars represent 95% confidence intervals") +
  geom_point(size = 3) +
  geom_errorbar(aes(ymax = upper.ci.dv,
                    ymin = lower.ci.dv),
                width = 0) +
  theme_bw()
dev.off()
```

```{r table_7_byparty, echo=FALSE}
# run model on subsamples by party
experiment.dems <- lm_robust(electionlegit ~ sports_concession +
                                political_concession +
                                trump_therm + age + education,
                              data = data_merged[data_merged$party == "Democrat",], 
                              se="HC2")
experiment.reps <- lm_robust(electionlegit ~ sports_concession +
                                political_concession +
                                trump_therm + age + education,
                             data = data_merged[data_merged$party == "Republican",], 
                             se="HC2")

# modelsummary table
modelsummary(models = 
               list("Perceived Election Legitimacy (Democrats)" = experiment.dems,
                    "Perceived Election Legitimacy (Republicans)" = experiment.reps),
             estimate = "{round(estimate, 3)} ({round(std.error, 3)}) {stars}",
             fmt = NULL,
             statistic = NULL,
             stars = c("+" = .1, "*" = .05, "**" = .01, "***" = .001), 
             notes = list("* p < .05, ** p < .01, *** p < .001"),
             coef_map = experiment_cm,
             gof_map = "nobs",
             output = "latex")
```

```{r figure_11_experiment_byparty, echo=FALSE}}
# setup figure based on previously run models
politics_exp_by_party <-
  Rmisc::summarySE(data_merged, 
          measurevar = "electionlegit", 
          groupvars = c("political_concession", "party"),
          na.rm = T)

politics_exp_by_party$political_concession <-
  as.factor(politics_exp_by_party$political_concession)

sports_exp_by_party <-
  Rmisc::summarySE(data_merged, 
          measurevar = "electionlegit", 
          groupvars = c("sports_concession", "party"),
          na.rm = T)

sports_exp_by_party$sports_concession <-
  as.factor(sports_exp_by_party$sports_concession)

# merge our two datasets
pol_exp_tomerge <- politics_exp_by_party %>% 
  dplyr::rename(treatment = political_concession) %>%
  mutate(type = "Political Concession")
sports_exp_tomerge <- sports_exp_by_party %>%
  dplyr::rename(treatment = sports_concession) %>%
  mutate(type = "Sports Concession")

combined_exp_by_party <- rbind(pol_exp_tomerge, sports_exp_tomerge)

# plot the results
res_split_by_party <-
  ggplot(combined_exp_by_party, 
       aes(x = treatment, y = electionlegit, color = party)) +
  geom_point() +
  geom_errorbar(aes(ymin = electionlegit - ci, ymax = electionlegit + ci), width = 0) +
  scale_color_manual(values = c("blue", "red")) +
  labs(x = "",
       y = "Perceived Election Legitimacy (1-4)",
       color = "Party",
       caption = "Points represent raw means for pooled conditions, split by party. Points and confidence intervals are unadjusted for any covariates.") +
  scale_x_discrete(labels = c("No", "Yes")) +
  facet_wrap(~type) +
  theme_bw() +
  theme(plot.caption = element_textbox_simple(margin = margin(8, 0, 0, 0), size = 10))


png(filename = "img/exp_by_party.png",
    width = 20, height = 10, units = "cm", type = "cairo-png", res = 300)
res_split_by_party
dev.off()
```

```{r table_8_attncheck, echo=FALSE}}
talley_check <- data_merged %>% 
  filter(data_merged$cand_choice == "David Talley (Democrat)" &
           data_merged$politics.check != "")
talley <- round(prop.table(
  table(talley_check$politics.check, talley_check$cand_choice)), 3) * 100

dobson_check <- data_merged %>% 
  filter(data_merged$cand_choice == "Thomas Dobson (Republican)" &
           data_merged$politics.check != "")
dobson <- round(prop.table(
  table(dobson_check$politics.check, dobson_check$cand_choice)), 3) * 100

kbl(cbind(talley, dobson),booktabs = T, 
    caption = "Percent Responses by Winner Name",
    format = "latex")
```

```{r figure_12_attncheck, echo=FALSE}}
# check which videos each respondent reported watching
data_merged$novideos <- 
  ifelse(data_merged$attention.check == "I did not watch any videos", 1, 0)
data_merged$sportsvideo <- 
  ifelse(data_merged$attention.check == "A video about sports", 1, 0)
data_merged$politicsvideo <- 
  ifelse(data_merged$attention.check == "A video about politics", 1, 0)
data_merged$bothvideos <- 
  ifelse(data_merged$attention.check == "A video about sports and a video about politics", 1, 0)

# show how often people were correct (almost always, good!)
manipulation_table <- data_merged %>% group_by(condition_4) %>%
  dplyr::summarise(mean.novideos = mean(novideos, na.rm = TRUE),
            sd.novideos = sd(novideos, na.rm = TRUE),
            n.novideos = n(),
            mean.sportsvideo = mean(sportsvideo, na.rm = TRUE),
            sd.sportsvideo = sd(sportsvideo, na.rm = TRUE),
            n.sportsvideo = n(),
            mean.politicsvideo = mean(politicsvideo, na.rm = TRUE),
            sd.politicsvideo = sd(politicsvideo, na.rm = TRUE),
            n.politicsvideo = n(),
            mean.bothvideos = mean(bothvideos, na.rm = TRUE),
            sd.bothvideos = sd(bothvideos, na.rm = TRUE),
            n.bothvideos = n()) %>% 
  select(condition_4, mean.novideos, mean.sportsvideo, mean.politicsvideo, mean.bothvideos)

long_manip_table <- manipulation_table %>% pivot_longer(
  !condition_4, names_to = "variable", values_to = "value"
)

long_manip_table$variable <- 
  factor(long_manip_table$variable,
            levels = c("mean.novideos", "mean.sportsvideo", "mean.politicsvideo", "mean.bothvideos"),
            labels = c("No Videos", "Sports Video Only", "Politics Video Only", "Both Videos"))
long_manip_table$condition_4 <- 
  factor(long_manip_table$condition_4,
         levels = c("No Concession", "Sports Concession Only", "Politics Concession Only", "Sports and Politics Concessions"),
         labels = c("No Concession", "Sports \nConcession Only", "Politics \nConcession Only", "Sports and \nPolitics Concessions"))

# dump to image
png(filename = "img/attention_check.png",
    width = 20, height = 12, units = "cm", type = "cairo-png", res = 300)
ggplot(long_manip_table, aes(x = condition_4, y = value, color = variable)) +
  geom_point(size = 2) +
  labs(x = "Respondent Condition", y = "Proportion Responding",
       color = "Respondent Answer to \nVideo(s) Watched") +
  theme_bw()
dev.off()
```

```{r table_9_anova, echo=FALSE}}
winteraction <- anova_test(electionlegit~political_concession +sports_concession +
                              political_concession:sports_concession +
                              trump_therm + age + education,
                            data = data_merged)

kbl(winteraction, booktabs = T, format = "latex")

```

END OF DOCUMENT
